Make the prediction grid

Author

Max Lindmark

Published

2024-04-23

Intro

Make an evenly spaced UTM prediction grid with all spatially varying covariates for the diet and the biomass data

library(tidyverse)
library(tidylog)
library(tidync)
library(tidyterra)
library(sp)
library(devtools)
library(RCurl)
library(sdmTMB)
library(ncdf4)
library(terra)
library(viridis)
# devtools::install_github("seananderson/ggsidekick") # not on CRAN 
library(ggsidekick)
theme_set(theme_sleek())

home <- here::here()
# source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/map-plot.R")
source(paste0(home, "/R/functions/map-plot.R"))
Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.

Read data and depth-raster

# Read data
d <- readr::read_csv(paste0(home, "/data/clean/larval_size.csv"))
Rows: 47020 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): haul_id, species, period
dbl (11): length_mm, year, day, month, lat, lon, temp_obs, yday, X, Y, temp

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Make the grid

First make a grid for the area, then subset that based on the extend of the size data

x <- d$X
y <- d$Y
z <- chull(x, y)

coords <- cbind(x[z], y[z])

coords <- rbind(coords, coords[1, ])

plot(coords[, 1] ~ coords[, 2]) # plot data

sp_poly <- sp::SpatialPolygons(
  list(sp::Polygons(list(sp::Polygon(coords)), ID = 1))
  )

sp_poly_df <- sp::SpatialPolygonsDataFrame(sp_poly,
                                           data = data.frame(ID = 1)
                                           )
cell_width <- 3

pred_grid <- expand.grid(
  X = seq(min(d$X), max(d$X), cell_width),
  Y = seq(min(d$Y), max(d$Y), cell_width),
  year = seq(min(d$year), max(d$year))
  )

ggplot(pred_grid %>% filter(year == 2019), aes(X, Y)) +
  geom_point(size = 0.1) +
  theme_void() +
  coord_sf()

sp::coordinates(pred_grid) <- c("X", "Y")

inside <- !is.na(sp::over(pred_grid, as(sp_poly_df, "SpatialPolygons")))

pred_grid <- pred_grid[inside, ]

pred_grid <- as.data.frame(pred_grid)

plot_map +
  geom_point(data = filter(pred_grid, year == 1999), aes(X*1000, Y*1000), size = 0.001, alpha = 0.5) +
  NULL

# Add lat and lon
# Need to go from UTM to lat long for this one...
# https://stackoverflow.com/questions/30018098/how-to-convert-utm-coordinates-to-lat-and-long-in-r
xy <- as.matrix(pred_grid %>% dplyr::select(X, Y) %>% mutate(X = X*1000, Y = Y*1000))
v <- vect(xy, crs="+proj=utm +zone=32 +datum=WGS84  +units=m")
y <- project(v, "+proj=longlat +datum=WGS84")
lonlat <- geom(y)[, c("x", "y")]

pred_grid$lon <- lonlat[, 1]
pred_grid$lat <- lonlat[, 2]

Depth and crop

# Add depth now to remove islands and remaining land
# https://gis.stackexchange.com/questions/411261/read-multiple-layers-raster-from-ncdf-file-using-terra-package
# https://emodnet.ec.europa.eu/geoviewer/
dep_raster <- terra::rast(paste0(home, "/data/covariates/depth/Mean depth natural colour (with land).nc"))
class(dep_raster)
[1] "SpatRaster"
attr(,"package")
[1] "terra"
crs(dep_raster, proj = TRUE)
[1] "+proj=longlat +datum=WGS84 +no_defs"
plot(dep_raster)

pred_grid$depth <- terra::extract(dep_raster, pred_grid %>% dplyr::select(lon, lat))$elevation

pred_grid$depth <- pred_grid$depth*-1

# pred_grid <- pred_grid %>% drop_na(depth)

# FIXME
# Hack to get rid of Denmark and other land with incomplete bathy
# ggplot(pred_grid, aes(X*1000, Y*1000, fill = depth)) + 
#   geom_raster()
# 
pred_grid <- pred_grid %>%
  mutate(depth = ifelse(is.na(depth) & lon < 8.8, 0, depth)) %>% 
  drop_na(depth)

# ggplot(pred_grid2, aes(X*1000, Y*1000, fill = depth)) + 
#   geom_raster()

# FIXME: tidy up this prediction grid by removing the super coastal samples
plot_map + 
  geom_raster(data = pred_grid, aes(X*1000, Y*1000, fill = depth), size = 0.001) + 
  #facet_wrap(~year) + 
  geom_sf()
Warning in geom_raster(data = pred_grid, aes(X * 1000, Y * 1000, fill = depth),
: Ignoring unknown parameters: `size`

Temperature

# Satelite derived temperatures
# https://data.marine.copernicus.eu/product/SST_BAL_SST_L4_REP_OBSERVATIONS_010_016/description
# https://data.marine.copernicus.eu/product/SST_BAL_SST_L4_REP_OBSERVATIONS_010_016/download

print(nc_open(paste0(home, "/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802008633.nc")))
File /Users/maxlindmark/Dropbox/Max work/R/larval-sizes/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802008633.nc (NC_FORMAT_NETCDF4):

     1 variables (excluding dimension variables):
        float analysed_sst[longitude,latitude,time]   (Contiguous storage)  
            units: kelvin
            _FillValue: NaN
            standard_name: sea_surface_foundation_temperature
            long_name: Analysed sea surface temperature

     3 dimensions:
        latitude  Size:355 
            standard_name: latitude
            long_name: Latitude
            units: degrees_north
            unit_long: Degrees North
            axis: Y
            valid_min: 48
            valid_max: 66
        longitude  Size:342 
            standard_name: longitude
            long_name: Longitude
            units: degrees_east
            unit_long: Degrees East
            axis: X
        time  Size:1462 
            standard_name: time
            long_name: Time
            units: seconds since 1970-01-01 00:00:00
            calendar: gregorian
            axis: T

    11 global attributes:
        Conventions: CF-1.11
        title: Baltic Sea SST analysis, daily reprocessed level 4 analysis
        institution: Danish Meteorological Institute, DMI
        source: ESA SST CCI, C3S and  CMEMS FMI and SMHI Sea ice concentration
        history: Version 1.0
        references: Høyer, J. L. and She, J., Optimal interpolation of sea surface temperature for the North Sea and Baltic Sea,  J. Mar. Sys., Vol 65, 1-4, pp. 176-189, 2007, Høyer, J. L., Le Borgne, P., & Eastwood, S. (2014). A bias correction method for Arctic satellite sea surface temperature observations. Remote Sensing of Environment, 146, 201-213.
        comment: IN NO EVENT SHALL DMI OR ITS REPRESENTATIVES BE LIABLE FOR ANY DAMAGES WHATSOEVER INCLUDING, WITHOUT LIMITATION, SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES OR DAMAGES FOR LOSS OF BUSINESS PROFITS OR SAVINGS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION OR OTHER PECUNIARY LOSS ARISING OUT OF THE USE OF OR THE BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION OR OTHER PECUNIARY LOSS ARISING OUT OF THE USE OF OR THE INABILITY TO USE THIS DMI PRODUCT, EVEN IF DMI HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION SHALL APPLY TO CLAIMS OF PERSONAL INJURY TO THE EXTENT PERMITTED BY LAW. SOME COUNTRIES OR STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF LIABILITY FOR CONSEQUENTIAL, SPECIAL, INDIRECT, INCIDENTAL DAMAGES AND, ACCORDINGLY, SOME PORTIONS OF THESE LIMITATIONS MAY NOT APPLY TO YOU. BY USING THIS PRODUCT, YOU HAVE ACCEPTED THAT THE ABOVE LIMITATIONS OR THE MAXIMUM LEGALLY APPLICABLE SUBSET OF THESE LIMITATIONS APPLY TO YOUR USE OF THIS PRODUCT. WARNING Some applications are unable to properly handle signed bytevalues. If values are encountered > 127, please subtract 256 from this reported value
        subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
        subset:productId: SST_BAL_SST_L4_REP_OBSERVATIONS_010_016
        subset:datasetId: DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_202012
        subset:date: 2024-03-30T12:33:28.633Z
temp_tibble1 <- tidync(paste0(home, "/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802008633.nc")) %>%
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  group_by(year, longitude, latitude) %>% 
  summarise(sst = mean(analysed_sst) - 273.15) %>% 
  ungroup()
`summarise()` has grouped output by 'year', 'longitude'. You can override using
the `.groups` argument.
temp_tibble2 <- tidync(paste0(home, "/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802082358.nc")) %>%
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  group_by(year, longitude, latitude) %>% 
  summarise(sst = mean(analysed_sst) - 273.15) %>% 
  ungroup()
`summarise()` has grouped output by 'year', 'longitude'. You can override using
the `.groups` argument.
temp_tibble3 <- tidync(paste0(home, "/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802166794.nc")) %>%
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  group_by(year, longitude, latitude) %>% 
  summarise(sst = mean(analysed_sst) - 273.15) %>% 
  ungroup()
`summarise()` has grouped output by 'year', 'longitude'. You can override using
the `.groups` argument.
temp_tibble4 <- tidync(paste0(home, "/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802239078.nc")) %>%
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  group_by(year, longitude, latitude) %>% 
  summarise(sst = mean(analysed_sst) - 273.15) %>% 
  ungroup()
`summarise()` has grouped output by 'year', 'longitude'. You can override using
the `.groups` argument.
temp_tibble5 <- tidync(paste0(home, "/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802304331.nc")) %>%
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  group_by(year, longitude, latitude) %>% 
  summarise(sst = mean(analysed_sst) - 273.15) %>% 
  ungroup()
`summarise()` has grouped output by 'year', 'longitude'. You can override using
the `.groups` argument.
temp_tibble6 <- tidync(paste0(home, "/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802375462.nc")) %>%
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  group_by(year, longitude, latitude) %>% 
  summarise(sst = mean(analysed_sst) - 273.15) %>% 
  ungroup()
`summarise()` has grouped output by 'year', 'longitude'. You can override using
the `.groups` argument.
temp_tibble7 <- tidync(paste0(home, "/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802434416.nc")) %>%
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  group_by(year, longitude, latitude) %>% 
  summarise(sst = mean(analysed_sst) - 273.15) %>% 
  ungroup()
`summarise()` has grouped output by 'year', 'longitude'. You can override using
the `.groups` argument.
temp_tibble8 <- tidync(paste0(home, "/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802668854.nc")) %>%
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  group_by(year, longitude, latitude) %>% 
  summarise(sst = mean(analysed_sst) - 273.15) %>% 
  ungroup()
`summarise()` has grouped output by 'year', 'longitude'. You can override using
the `.groups` argument.
temp_tibble <- bind_rows(temp_tibble1,
                         temp_tibble2,
                         temp_tibble3,
                         temp_tibble4,
                         temp_tibble5,
                         temp_tibble6,
                         temp_tibble7,
                         temp_tibble8)

hist(temp_tibble$sst)

unique(temp_tibble$year)
 [1] 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005
[16] 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
[31] 2021 2022 2023
# Loop through all year combos, extract the temperatures at the data locations

# Loop, make raster and extract
temp_list <- list()


for(i in unique(pred_grid$year)) {
  
  d_sub <- filter(pred_grid, year == i)
  temp_tibble_sub <- filter(temp_tibble, year == i)
  
  # Convert to raster
  temp_raster <- as_spatraster(temp_tibble_sub, xycols = 2:3,
                               crs = "WGS84", digits = 2)

  ggplot() +
    geom_spatraster(data = temp_raster$sst, aes(fill = sst))

  # Extract from raster
  d_sub$temp <- terra::extract(temp_raster$sst,
                               d_sub %>% dplyr::select(lon, lat))$sst  
    
  # Save
  temp_list[[i]] <- d_sub
  
}

pred_grid <- bind_rows(temp_list)

Plot temperature

plot_map_fc + 
  geom_raster(data = pred_grid %>% filter(!year == 2011), aes(X*1000, Y*1000, fill = temp)) + 
  facet_wrap(~year, ncol = 7) + 
  theme_facet_map(base_size = 10) + 
  geom_sf() +
  scale_fill_viridis(option = "magma", name = "January SST (°C)")

ggsave(paste0(home, "/figures/sst.pdf"), width = 17, height = 15, units = "cm")


# Mean temperature in the domain over time
pred_grid %>% 
  drop_na(temp) %>% 
  summarise(temp = mean(temp), .by = "year") %>% 
  ggplot(aes(year, temp)) + 
  geom_point(size = 1.5, color = "gray10") + 
  labs(y = "Mean January SST (°C)", x = "Year") + 
  geom_smooth(method = "gam", formula = y~s(x, k = 8), color = "tomato3")

ggsave(paste0(home, "/figures/sst_mean.pdf"), width = 11, height = 11, units = "cm")

Chlorophyll

# Satelite derived chlorophyll
# https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_BGC_001_029/description
# https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_BGC_001_029/download

# This database reports chl predictions at different depths in the period 1993-2022. 
# We need to load and finally bind this data in portions, to avoid size limits. 

chl_tibble_1993_1999 <- tidync(paste0(home, '/data/covariates/chlorophyll/cmems_mod_glo_bgc_my_0.25_P1D-m_1713795858492_01011993_12311999.nc')) %>% # 59N 56S 8W 13E; from 01/01/2015 to 12/31/2022
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  select(-time, -date) %>% 
  group_by(year, longitude, latitude) %>% 
  rename(lon = longitude, lat = latitude) %>% 
  summarise(chl = mean(chl)) %>% 
  ungroup() 
`summarise()` has grouped output by 'year', 'lon'. You can override using the
`.groups` argument.
chl_tibble_2000_2008 <- tidync(paste0(home, '/data/covariates/chlorophyll/cmems_mod_glo_bgc_my_0.25_P1D-m_1713795803251_01012000_12312008.nc')) %>% # 59N 56S 8W 13E; from 01/01/2015 to 12/31/2022
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  select(-time, -date) %>% 
  group_by(year, longitude, latitude) %>% 
  rename(lon = longitude, lat = latitude) %>% 
  summarise(chl = mean(chl)) %>% 
  ungroup() 
`summarise()` has grouped output by 'year', 'lon'. You can override using the
`.groups` argument.
chl_tibble_2009_2016 <- tidync(paste0(home, '/data/covariates/chlorophyll/cmems_mod_glo_bgc_my_0.25_P1D-m_1713795716234_01012009_12312016.nc')) %>% # 59N 56S 8W 13E; from 01/01/2015 to 12/31/2022
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  select(-time, -date) %>% 
  group_by(year, longitude, latitude) %>% 
  rename(lon = longitude, lat = latitude) %>% 
  summarise(chl = mean(chl)) %>% 
  ungroup() 
`summarise()` has grouped output by 'year', 'lon'. You can override using the
`.groups` argument.
chl_tibble_2017_2022 <- tidync(paste0(home, '/data/covariates/chlorophyll/cmems_mod_glo_bgc_my_0.25_P1D-m_1713795613611_01012017_12312022.nc')) %>% # 59N 56S 8W 13E; from 01/01/2015 to 12/31/2022
  hyper_tibble() %>% 
  mutate(date = as_datetime(time, origin = '1970-01-01')) %>%
  mutate(month = month(date),
         day = day(date),
         year = year(date)) %>% 
  filter(month == 1) %>% 
  select(-time, -date) %>% 
  group_by(year, longitude, latitude) %>% 
  rename(lon = longitude, lat = latitude) %>% 
  summarise(chl = mean(chl)) %>% 
  ungroup() 
`summarise()` has grouped output by 'year', 'lon'. You can override using the
`.groups` argument.
chl_tibble <- bind_rows(chl_tibble_1993_1999,
                         chl_tibble_2000_2008,
                         chl_tibble_2009_2016,
                         chl_tibble_2017_2022)

hist(chl_tibble$chl)

unique(chl_tibble$year)
 [1] 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007
[16] 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
# Loop through all year combos, extract the temperatures at the data locations

# Loop, make raster and extract
chl_list <- list()

for(i in unique(chl_tibble$year)) {
  
  d_sub <- filter(pred_grid, year == i)
  chl_tibble_sub <- filter(chl_tibble, year == i)
  
  # Convert to raster
  chl_raster <- as_spatraster(chl_tibble_sub, xycols = 2:3,
                               crs = "WGS84", digits = 2)
  
  ggplot() +
    geom_spatraster(data = chl_raster$chl, aes(fill = chl))
  
  # Extract from raster
  d_sub$chl <- terra::extract(chl_raster$chl,
                               d_sub %>% dplyr::select(lon, lat))$chl  
  
  # Save
  chl_list[[i]] <- d_sub
  
}

pred_grid <- bind_rows(chl_list)
plot_map_fc + 
  geom_raster(data = pred_grid %>% filter(!year == 2011), aes(X*1000, Y*1000, fill = chl)) + 
  facet_wrap(~year, ncol = 7) + 
  theme_facet_map(base_size = 10) + 
  geom_sf() +
  scale_fill_viridis(option = "viridis", name = "January chl (mg/m^3)")

ggsave(paste0(home, "/figures/chl.pdf"), width = 17, height = 15, units = "cm")

# Mean chlorophyll in the domain over time
pred_grid %>% 
  drop_na(chl) %>% 
  summarise(chl = mean(chl), .by = "year") %>% 
  ggplot(aes(year, chl)) + 
  geom_point(size = 1.5, color = "gray10") + 
  labs(y = "Mean January chl (mg/m^3)", x = "Year") + 
  geom_smooth(method = "gam", formula = y~s(x, k = 8), color = "tomato3")

ggsave(paste0(home, "/figures/chl_mean.pdf"), width = 11, height = 11, units = "cm")

Save

write_csv(pred_grid, file = paste0(home, "/data/clean/pred_grid.csv"))